In this analysis, we demonstrate that MitoTrace could identify the germline mutations on single-cell Smart-seq2 data which publicly available from Darmanis et al. (Darmanis et al. 2017).

Load R libraries

library(MitoTrace)
library(pheatmap)

Read the BAM files, these BAM files can be downloaded from the link: (https://drive.google.com/open?id=1oGK9_RE5t2CkzFKlg24dhaKmItmgRmMt).

bams <- list.files("../Data/DarmanisEtAl/bam_file", full.names = T, pattern = ".bam$")

Read the human GRCH38 Mitochondrial reference genome

fasta_loc <- "../Data/GRCH38_MT.fa"

MitoTrace calculates alternative allele counts and read coverage for each nucleotide position

mae_res <- MitoTrace(bam_list = bams, fasta = fasta_loc)

MitoTrace calculate the allele frequencies

af <- calc_allele_frequency(mae_res)
colnames(af) <- unlist(lapply(colnames(af), function(x) { a <- strsplit(x, "\\.") [[1]][1]}))

Read the relevant label files originated from the publication (Darmanis et al. 2017).

ssr_list <- read.table("../Data/DarmanisEtAl/annotation_file/SRR_Patient_all_list_sorted", sep = "\t")
ok <- intersect(colnames(af), ssr_list$V1)
ssr_list <- ssr_list[match(colnames(af), as.character(ssr_list$V1)),]

Read the plate relation file

plate_relation <- read.table("../Data/DarmanisEtAl/annotation_file/GBM_SRA_plate_relation", sep = "\t", header = T)
plate_relation <- plate_relation[match(colnames(af), as.character(plate_relation$sra)),]

Read metadata file

meta <- read.table("../Data/DarmanisEtAl/annotation_file/GBM_metadata.csv")

Read the geo file

geo <- read.delim("../Data/DarmanisEtAl/annotation_file/FromGeoSpreadsheet.tsv", row.names = 1)
geo <- geo[as.character(plate_relation$plate.well),]

Read the tSNE matrix

tsne <- read.table("../Data/DarmanisEtAl/annotation_file/GBM_TSNE.csv")

Split by allele frequency

asplit <- split(as.character(ssr_list$V1), ssr_list$V2)
sample_af_means <- do.call(cbind, lapply(asplit, function(x){
  subm_af <- af[,x]
  rowMeans(subm_af, na.rm = T)
}))
cutoff <- 0.75
ok <- which(apply(sample_af_means, 1, function(x) sum(x > cutoff)) > 0)

Identify informative germline mutations using AOV

pvals <- apply(af[ok,], 1, function(x) try(summary(aov(x ~ geo$V8))[[1]][1, 5]))
ok <- names(which(pvals < 1e-100))

Generate the heatmap to plot the data

asplit <- split(colnames(af), geo$V8)
samples_ok <- unlist(lapply(asplit, function(x) sample(x, 200)))
indices_ok <- match(samples_ok, colnames(af))
tmp <- af[ok, indices_ok]
anno_col <- data.frame(patient = geo$V8[indices_ok])
rownames(anno_col) <- colnames(tmp)
pheatmap(tmp, show_colnames = F, annotation_col = anno_col, cluster_cols = T)

Highlight some identified germline mutation sites

par(mfrow = c(1,4))
boxplot(split(tmp["11864T>C",], anno_col$patient == "BT_S1"), main = "11864T>C", xlab = "BT_S1", ylab = "Alternative allele frequency")
boxplot(split(tmp["4924G>A",], anno_col$patient == "BT_S2"), main = "4924G>A", xlab = "BT_S2", ylab = "Alternative allele frequency")
boxplot(split(tmp["4790A>G",], anno_col$patient == "BT_S4"), main = "4790A>G", xlab = "BT_S4", ylab = "Alternative allele frequency")
boxplot(split(tmp["709G>A",], anno_col$patient == "BT_S6"), main = "709G>A", xlab = "BT_S6", ylab = "Alternative allele frequency")
par(mfrow = c(1,1))

tSNE visualization of the cell-cell distance matrix by mitochondrial heteroplasmy profiles separated 4 patients.

vars <- apply(af, 1, var)
ok <- names(tail(sort(vars), 500))
tmp <- data.matrix(af[ok,])
tmp <- tmp[,which(apply(tmp, 2, var) > 0)]
correl <- cor(tmp)
tData <- Rtsne::Rtsne(1 - abs(correl), perplexity = 20)
plot(tData$Y, col = as.character(meta$Sample.name.color), main = "Patient", pch =20)

tSNE visualization of the cell-cell distance matrix by cell type identify

tsne <- tsne[rownames(meta),]
plot(tsne, col = as.character(meta$Sample.name.color), pch = 16, xlab = "tSNE1", ylab = "tSNE2")

tSNE visualization of the cell cluster by cell type

plot(tsne, col = as.character(meta$Cluster_2d_color), pch = 16, xlab = "tSNE1", ylab = "tSNE2")



lkmklsmn/MitoTrace documentation built on June 29, 2023, 5:55 a.m.